library(ade4) #Librairie qui permet l'implentation de fonctions statistiques et graphiques
library(FactoMineR) #Il permet de réaliser des analyses classiques telles que l'analyse en composantes principales (ACP), l'analyse des correspondances (AC), l'analyse des correspondances multiples (ACM) ainsi que des analyses plus avancées.
##
## Attaching package: 'FactoMineR'
## The following object is masked from 'package:ade4':
##
## reconst
library(glmnet) #Permet d'ajuster l'ensemble du chemin de régularisation lasso ou élastique-net pour la régression linéaire
## Loading required package: Matrix
## Loaded glmnet 4.1-1
library(corrplot)#Permet de visualiser une matrice de corrélation par corrélogrammee
## corrplot 0.84 loaded
library(pls)
##
## Attaching package: 'pls'
## The following object is masked from 'package:corrplot':
##
## corrplot
## The following object is masked from 'package:stats':
##
## loadings
#Importation des données
library(readr)
logtsDK <- read.delim("logtsDK.csv")#Pour importer cet ensemble de données
##Transformation des variables qualitatives en indicatrices:
logtsDK_taille <- logtsDK[,3:12]
logtsDK_fact <- logtsDK[,13:29]
logtsDKnomIndic <- acm.disjonctif(logtsDK_fact)
LDK = cbind(logtsDK_taille,logtsDKnomIndic)
##Regression MCO directement:
logtsDK_taille<-as.matrix(logtsDK_taille)
MCO_taille<- lm(logtsDK$Loyer~logtsDK_taille,data= as.data.frame(logtsDK))
summary(MCO_taille)
##
## Call:
## lm(formula = logtsDK$Loyer ~ logtsDK_taille, data = as.data.frame(logtsDK))
##
## Residuals:
## Min 1Q Median 3Q Max
## -305.39 -125.68 -42.34 99.10 913.05
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 430.0836 421.0314 1.022 0.31468
## logtsDK_tailleSurfTerrain 1.2765 0.4273 2.987 0.00536 **
## logtsDK_tailleSurfHabitable 2.9112 2.9678 0.981 0.33400
## logtsDK_tailleSurfPiecResid -29.9119 16.7734 -1.783 0.08403 .
## logtsDK_tailleNbPieces -177.3241 225.0162 -0.788 0.43647
## logtsDK_tailleNbPiecesResid 254.2928 300.7181 0.846 0.40404
## logtsDK_tailleNbSDB 224.3603 240.5155 0.933 0.35789
## logtsDK_tailleNbChamBur -209.9237 99.3588 -2.113 0.04251 *
## logtsDK_tailleNbSalonsSAM NA NA NA NA
## logtsDK_tailleNbWC 384.2636 218.9312 1.755 0.08880 .
## logtsDK_tailleNbCuis NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 239.8 on 32 degrees of freedom
## Multiple R-squared: 0.7541, Adjusted R-squared: 0.6926
## F-statistic: 12.26 on 8 and 32 DF, p-value: 7.887e-08
##ACP par thème:
#Thème Taille:
PCA1<-PCA(logtsDK_taille)
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps


plot.PCA(PCA1,choix = "var")

PCA1$var$cos2
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## SurfTerrain 0.6957596 0.0023491231 0.147534287 0.1007699984 1.035906e-03
## SurfHabitable 0.9652200 0.0075977653 0.003656496 0.0041111908 6.947176e-03
## SurfPiecResid 0.5601628 0.0668936381 0.289068886 0.0536207377 1.248462e-02
## NbPieces 0.9676251 0.0004346449 0.030462967 0.0001964992 7.727818e-05
## NbPiecesResid 0.9137550 0.0030160610 0.068883614 0.0002818111 9.413272e-03
## NbSDB 0.8300173 0.0054225362 0.000296253 0.0427220693 4.366669e-02
## NbChamBur 0.8453092 0.0013572035 0.096734857 0.0081218865 4.729963e-02
## NbSalonsSAM 0.7644196 0.0711421011 0.009720707 0.0739533555 4.454982e-02
## NbWC 0.7980084 0.0582834622 0.002742312 0.0059807346 9.595988e-03
## NbCuis 0.1360579 0.8006677932 0.041336963 0.0006572742 3.691325e-03
corrplot(PCA1$var$cos2)

##Valeurs propres:
PCA1$eig
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 7.476335e+00 7.476335e+01 74.76335
## comp 2 1.017164e+00 1.017164e+01 84.93499
## comp 3 6.904373e-01 6.904373e+00 91.83937
## comp 4 2.904156e-01 2.904156e+00 94.74352
## comp 5 1.787617e-01 1.787617e+00 96.53114
## comp 6 1.754505e-01 1.754505e+00 98.28564
## comp 7 1.597364e-01 1.597364e+00 99.88301
## comp 8 1.169930e-02 1.169930e-01 100.00000
## comp 9 3.136885e-30 3.136885e-29 100.00000
## comp 10 2.524442e-32 2.524442e-31 100.00000
barplot(PCA1$eig[,2])

pca1 = prcomp(logtsDK_taille)
##La croissance des valeurs propres
pca1$rotation ##Les composantes principales
## PC1 PC2 PC3 PC4
## SurfTerrain -0.9015978499 4.324693e-01 -0.009169902 -0.001711798
## SurfHabitable -0.4312907882 -8.991760e-01 -0.018069683 0.070191734
## SurfPiecResid -0.0253149027 -3.480241e-02 0.933374069 -0.341462769
## NbPieces -0.0167691961 -4.159075e-02 -0.243539707 -0.751182081
## NbPiecesResid -0.0102744528 -3.000054e-02 -0.201291721 -0.374396168
## NbSDB -0.0031195791 -4.901760e-03 -0.023623484 -0.209235806
## NbChamBur -0.0073873933 -2.181098e-02 -0.162214303 -0.329431970
## NbSalonsSAM -0.0028870594 -8.189555e-03 -0.039077418 -0.044964198
## NbWC -0.0030333584 -6.704037e-03 -0.008944071 -0.136986969
## NbCuis -0.0003418059 1.558701e-05 -0.009680432 -0.030563138
## PC5 PC6 PC7 PC8
## SurfTerrain -0.0007162984 -0.001429731 -0.001223207 0.0007223059
## SurfHabitable 0.0013259133 0.011541967 0.007923196 -0.0044300523
## SurfPiecResid -0.0376762163 -0.088164094 -0.030592031 0.0153523845
## NbPieces 0.2724491793 0.190508482 0.019356024 -0.2218569125
## NbPiecesResid -0.0841160278 -0.519627551 -0.175089739 0.2863344421
## NbSDB 0.1233769467 0.283891985 0.769203385 0.2320613238
## NbChamBur -0.7018008002 -0.116556805 -0.035405338 0.0111605355
## NbSalonsSAM 0.6176847725 -0.403070746 -0.139684401 0.2751739065
## NbWC 0.0965307868 0.624398932 -0.595888538 0.1119792231
## NbCuis 0.1366574735 -0.198154883 0.021130916 -0.8522319016
## PC9 PC10
## SurfTerrain 2.803548e-18 -1.654866e-18
## SurfHabitable 5.060608e-18 1.311200e-17
## SurfPiecResid -4.768563e-17 -3.850923e-18
## NbPieces -3.772700e-01 2.682407e-01
## NbPiecesResid -8.304163e-02 -6.493655e-01
## NbSDB 3.772700e-01 -2.682407e-01
## NbChamBur 4.603116e-01 3.811248e-01
## NbSalonsSAM 4.603116e-01 3.811248e-01
## NbWC 3.772700e-01 -2.682407e-01
## NbCuis 3.772700e-01 -2.682407e-01
pca1$sdev ##Les écarts-types "bruts"
## [1] 2.165710e+02 5.324754e+01 4.314768e+00 1.082570e+00 6.356562e-01
## [6] 4.952575e-01 3.589661e-01 1.834611e-01 6.331211e-16 1.297068e-16
##Régression sur composantes principales
pcr1<- lm(logtsDK$Loyer~PCA1$ind$coord,data= as.data.frame(logtsDK))
summary(pcr1)
##
## Call:
## lm(formula = logtsDK$Loyer ~ PCA1$ind$coord, data = as.data.frame(logtsDK))
##
## Residuals:
## Min 1Q Median 3Q Max
## -436.25 -184.03 -10.35 121.94 908.92
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 356.05 40.87 8.712 2.75e-10 ***
## PCA1$ind$coordDim.1 117.40 14.95 7.854 3.13e-09 ***
## PCA1$ind$coordDim.2 -35.28 40.52 -0.871 0.3899
## PCA1$ind$coordDim.3 78.97 49.19 1.605 0.1174
## PCA1$ind$coordDim.4 -105.36 75.84 -1.389 0.1735
## PCA1$ind$coordDim.5 -260.68 96.66 -2.697 0.0107 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 261.7 on 35 degrees of freedom
## Multiple R-squared: 0.6796, Adjusted R-squared: 0.6338
## F-statistic: 14.85 on 5 and 35 DF, p-value: 7.961e-08
##On remarque qu'après la régression sur les composantes principales; les plus utiles sont la première, et la dernière.
##Régression MCO sur les CP 1 et 5:
MCO_taille2<- lm(logtsDK$Loyer~PCA1$ind$coord[,c(1,5)],data= as.data.frame(logtsDK))
summary(MCO_taille2)
##
## Call:
## lm(formula = logtsDK$Loyer ~ PCA1$ind$coord[, c(1, 5)], data = as.data.frame(logtsDK))
##
## Residuals:
## Min 1Q Median 3Q Max
## -471.72 -166.46 -2.39 128.45 865.44
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 356.05 42.07 8.463 2.84e-10 ***
## PCA1$ind$coord[, c(1, 5)]Dim.1 117.40 15.39 7.630 3.51e-09 ***
## PCA1$ind$coord[, c(1, 5)]Dim.5 -260.68 99.50 -2.620 0.0126 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 269.4 on 38 degrees of freedom
## Multiple R-squared: 0.6314, Adjusted R-squared: 0.612
## F-statistic: 32.54 on 2 and 38 DF, p-value: 5.826e-09
##C'est plus intéressant de faire la régression sur les 2 composantes que sur une seule. Ainsi, y'a pas de confusion
##entre les composantes; c'est la vraie-significativité.
##Thème Qualité:
logtsDK_qualite <- LDK[,11:33]
PCA2<- PCA(logtsDK_qualite, scale=TRUE)


corrplot(PCA2$var$cos2)

##Valeurs propres:
PCA2$eig
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 7.379811e+00 3.208613e+01 32.08613
## comp 2 3.105184e+00 1.350080e+01 45.58693
## comp 3 2.523830e+00 1.097317e+01 56.56011
## comp 4 1.885202e+00 8.196528e+00 64.75663
## comp 5 1.551232e+00 6.744489e+00 71.50112
## comp 6 1.387970e+00 6.034654e+00 77.53578
## comp 7 1.211577e+00 5.267725e+00 82.80350
## comp 8 1.073695e+00 4.668239e+00 87.47174
## comp 9 9.157240e-01 3.981409e+00 91.45315
## comp 10 7.103919e-01 3.088660e+00 94.54181
## comp 11 6.577247e-01 2.859673e+00 97.40148
## comp 12 2.544728e-01 1.106403e+00 98.50789
## comp 13 2.322656e-01 1.009851e+00 99.51774
## comp 14 1.109207e-01 4.822639e-01 100.00000
## comp 15 4.363338e-30 1.897104e-29 100.00000
## comp 16 6.547955e-31 2.846937e-30 100.00000
## comp 17 1.824182e-31 7.931227e-31 100.00000
## comp 18 1.084392e-31 4.714747e-31 100.00000
## comp 19 6.367497e-32 2.768477e-31 100.00000
## comp 20 4.914188e-32 2.136604e-31 100.00000
## comp 21 3.434781e-32 1.493383e-31 100.00000
## comp 22 2.695648e-32 1.172021e-31 100.00000
## comp 23 1.002895e-32 4.360411e-32 100.00000
barplot(PCA2$eig[,2])

##Régression sur composantes principales
pcr2<- lm(logtsDK$Loyer~PCA2$ind$coord,data= as.data.frame(logtsDK))
summary(pcr2)
##
## Call:
## lm(formula = logtsDK$Loyer ~ PCA2$ind$coord, data = as.data.frame(logtsDK))
##
## Residuals:
## Min 1Q Median 3Q Max
## -490.29 -116.25 -59.55 21.36 1202.08
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 356.05 49.33 7.217 2.01e-08 ***
## PCA2$ind$coordDim.1 111.17 18.16 6.122 5.34e-07 ***
## PCA2$ind$coordDim.2 5.61 28.00 0.200 0.842
## PCA2$ind$coordDim.3 -10.22 31.05 -0.329 0.744
## PCA2$ind$coordDim.4 52.77 35.93 1.469 0.151
## PCA2$ind$coordDim.5 16.91 39.61 0.427 0.672
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 315.9 on 35 degrees of freedom
## Multiple R-squared: 0.5331, Adjusted R-squared: 0.4664
## F-statistic: 7.993 on 5 and 35 DF, p-value: 4.147e-05
##MCO sur la première composante:
MCO_qualite<- lm(logtsDK$Loyer~PCA2$ind$coord[,1],data= as.data.frame(logtsDK))
summary(MCO_qualite)
##
## Call:
## lm(formula = logtsDK$Loyer ~ PCA2$ind$coord[, 1], data = as.data.frame(logtsDK))
##
## Residuals:
## Min 1Q Median 3Q Max
## -467.03 -141.66 -56.32 28.14 1297.99
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 356.05 48.37 7.361 6.87e-09 ***
## PCA2$ind$coord[, 1] 111.17 17.80 6.244 2.37e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 309.7 on 39 degrees of freedom
## Multiple R-squared: 0.4999, Adjusted R-squared: 0.4871
## F-statistic: 38.99 on 1 and 39 DF, p-value: 2.374e-07
##Thème Situation:
logtsDK_situation<- LDK[,34:50]
PCA3<- PCA(logtsDK_situation)
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps


corrplot(PCA3$var$cos2)

##Valeurs propres:
PCA3$eig
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 4.601369e+00 2.706688e+01 27.06688
## comp 2 4.061235e+00 2.388962e+01 50.95650
## comp 3 2.429751e+00 1.429265e+01 65.24915
## comp 4 2.200733e+00 1.294549e+01 78.19463
## comp 5 1.242892e+00 7.311132e+00 85.50577
## comp 6 1.020835e+00 6.004912e+00 91.51068
## comp 7 5.795349e-01 3.409029e+00 94.91971
## comp 8 4.129103e-01 2.428884e+00 97.34859
## comp 9 3.013460e-01 1.772624e+00 99.12121
## comp 10 1.493937e-01 8.787865e-01 100.00000
## comp 11 2.041883e-30 1.201108e-29 100.00000
## comp 12 6.078277e-31 3.575457e-30 100.00000
## comp 13 2.049065e-31 1.205333e-30 100.00000
## comp 14 1.091975e-31 6.423380e-31 100.00000
## comp 15 5.527018e-32 3.251187e-31 100.00000
## comp 16 2.333037e-32 1.372375e-31 100.00000
## comp 17 7.061867e-33 4.154039e-32 100.00000
barplot(PCA3$eig[,2])

##Régression sur composantes principales
pcr3<- lm(logtsDK$Loyer~PCA3$ind$coord,data= as.data.frame(logtsDK))
summary(pcr3)
##
## Call:
## lm(formula = logtsDK$Loyer ~ PCA3$ind$coord, data = as.data.frame(logtsDK))
##
## Residuals:
## Min 1Q Median 3Q Max
## -605.80 -121.34 -11.22 82.76 1294.20
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 356.05 50.29 7.080 3.01e-08 ***
## PCA3$ind$coordDim.1 126.92 23.44 5.414 4.58e-06 ***
## PCA3$ind$coordDim.2 -19.05 24.95 -0.764 0.4502
## PCA3$ind$coordDim.3 -82.66 32.26 -2.562 0.0149 *
## PCA3$ind$coordDim.4 -23.54 33.90 -0.695 0.4919
## PCA3$ind$coordDim.5 -20.56 45.11 -0.456 0.6513
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 322 on 35 degrees of freedom
## Multiple R-squared: 0.5149, Adjusted R-squared: 0.4456
## F-statistic: 7.43 on 5 and 35 DF, p-value: 7.727e-05
##MCO sur les composantes principales 1 et 3:
MCO_surface<- lm(logtsDK$Loyer~PCA3$ind$coord[,c(1,3)],data= as.data.frame(logtsDK))
summary(MCO_surface)
##
## Call:
## lm(formula = logtsDK$Loyer ~ PCA3$ind$coord[, c(1, 3)], data = as.data.frame(logtsDK))
##
## Residuals:
## Min 1Q Median 3Q Max
## -534.04 -123.33 -10.44 105.96 1365.96
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 356.05 49.13 7.247 1.14e-08 ***
## PCA3$ind$coord[, c(1, 3)]Dim.1 126.92 22.90 5.541 2.42e-06 ***
## PCA3$ind$coord[, c(1, 3)]Dim.3 -82.66 31.52 -2.623 0.0125 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 314.6 on 38 degrees of freedom
## Multiple R-squared: 0.4973, Adjusted R-squared: 0.4708
## F-statistic: 18.79 on 2 and 38 DF, p-value: 2.116e-06
#Simultanémenet:
PCAT<- PCA(LDK)


pcrT<- lm(logtsDK$Loyer~PCAT$ind$coord,data= as.data.frame(logtsDK))
summary(pcrT)
##
## Call:
## lm(formula = logtsDK$Loyer ~ PCAT$ind$coord, data = as.data.frame(logtsDK))
##
## Residuals:
## Min 1Q Median 3Q Max
## -274.66 -96.46 -31.16 71.71 845.08
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 356.05 33.83 10.524 2.19e-12 ***
## PCAT$ind$coordDim.1 78.17 8.14 9.603 2.42e-11 ***
## PCAT$ind$coordDim.2 -63.89 13.96 -4.578 5.70e-05 ***
## PCAT$ind$coordDim.3 23.61 15.09 1.565 0.12655
## PCAT$ind$coordDim.4 -12.38 15.83 -0.782 0.43931
## PCAT$ind$coordDim.5 48.99 17.15 2.856 0.00718 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 216.6 on 35 degrees of freedom
## Multiple R-squared: 0.7804, Adjusted R-squared: 0.749
## F-statistic: 24.88 on 5 and 35 DF, p-value: 1.295e-10
##Modélisation du loyer sur les thèmes simultanément:
MCO_T<- lm(logtsDK$Loyer~PCAT$ind$coord[,c(1,2,5)],data= as.data.frame(logtsDK))
summary(MCO_T)
##
## Call:
## lm(formula = logtsDK$Loyer ~ PCAT$ind$coord[, c(1, 2, 5)], data = as.data.frame(logtsDK))
##
## Residuals:
## Min 1Q Median 3Q Max
## -288.35 -99.20 -28.09 59.82 885.65
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 356.049 34.315 10.376 1.66e-12 ***
## PCAT$ind$coord[, c(1, 2, 5)]Dim.1 78.171 8.256 9.468 1.99e-11 ***
## PCAT$ind$coord[, c(1, 2, 5)]Dim.2 -63.893 14.155 -4.514 6.26e-05 ***
## PCAT$ind$coord[, c(1, 2, 5)]Dim.5 48.986 17.399 2.815 0.00776 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 219.7 on 37 degrees of freedom
## Multiple R-squared: 0.7612, Adjusted R-squared: 0.7418
## F-statistic: 39.31 on 3 and 37 DF, p-value: 1.356e-11
#Coefficients des variables reconstitués selon la RCP :
##Classification sur variables:
library(ClustOfVar)
LDK = cbind(logtsDK_taille,logtsDKnomIndic)
LDK_cr<- scale(LDK)*sqrt(81/82) ##Standardiser les variables
##On procéde par une classifiction hiérarchique sur les variables avec l'indice WARD:
dv<- dist(LDK_cr,method="euclidean")
CAH<- hclust(d=dv,method="ward.D")
##Dendrogramme
plot(CAH)

##Coupure de l'arbre pour k=2classes:
PV2<-cutree(tree = CAH,k=2)
##Calcul du R^2 des variables avec la variable de classe; On stocke tous les R^2 dans un seul vecteur:
R2_PV2= cbind(rep(0,ncol(LDK_cr)))
for (i in cbind(1:ncol(LDK_cr))) {R2_PV2[i] = summary(lm(LDK_cr[,i]~as.factor(PV2)))$r.squared}
##Calcul du R^2 de la partition:
R2P_PV2<- mean(R2_PV2)
## ON lance une boucle pour calculer R2 de la partiton pour k appartient à {3,4,..,8}
V <- rep(0,8)
for(i in 1:8) {
PV<-cutree(tree = CAH,k=(i+1))
R2_PV = cbind(rep(0,ncol(LDK_cr)))
for ( j in cbind(1:ncol(LDK_cr))) {R2_PV[j] =
summary(lm(LDK_cr[,j]~as.factor(PV)))$r.squared
V[i]<- mean(R2_PV)}
}
## Warning in summary.lm(lm(LDK_cr[, j] ~ as.factor(PV))): essentially perfect fit:
## summary may be unreliable
V
## [1] 0.1457266 0.2215363 0.2802125 0.3256438 0.3703531 0.4066013 0.4389535
## [8] 0.4682740
##Le vecteur V représenté ci-dessus contient les R^2 de chaque partition
##D'après le vecteur V qui contient les R^2 de chaque partition; on peut choisir 7 partitions.
##En effet, l'amélioration de l'agrégation de 6 à 7 classes est plus intéressante que l'agrégation de 7 à 8 classes.
##saut d'inertie:
inertie <- sort(CAH$height, decreasing = TRUE)
plot(inertie[1:20], type = "s", xlab = "Nombre de classes", ylab = "Inertie")
plot(inertie[1:20], type = "s", xlab = "Nombre de classes", ylab = "Inertie")
points(c(2, 4), inertie[c(2,4)], col = c("green3", "red3"), cex = 2, lwd = 3)

##Description de la partition en 4 classes:
P4 <- cutree(tree = CAH,k=4)
summary(P4)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 2.927 4.000 4.000
#Régression PLS:
LDKpls<-plsr(as.matrix(logtsDK[,2])~as.matrix(LDK),validation="LOO")
LDKpls$validation$PRESS
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps
## as.matrix(logtsDK[, 2]) 3201973 3706750 3309537 2994531 3232933 3617638 4084327
## 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
## as.matrix(logtsDK[, 2]) 4048026 3973410 4052785 4239620 4859660 5057962
## 14 comps 15 comps 16 comps 17 comps 18 comps 19 comps
## as.matrix(logtsDK[, 2]) 5509839 6006132 6228561 6465885 6777863 6985848
## 20 comps 21 comps 22 comps 23 comps 24 comps 25 comps
## as.matrix(logtsDK[, 2]) 7051845 7081969 7082304 7055691 7041062 7032221
## 26 comps 27 comps 28 comps 29 comps 30 comps 31 comps
## as.matrix(logtsDK[, 2]) 7023471 7009713 6998962 6992518 6992701 6992704
## 32 comps 33 comps 34 comps 35 comps 36 comps 37 comps
## as.matrix(logtsDK[, 2]) 6992663 6992626 6992625 6992624 6992624 6992624
## 38 comps 39 comps
## as.matrix(logtsDK[, 2]) 6992624 6992624
barplot(LDKpls$validation$PRESS)

plot(LDKpls)

##Selon les composantes retenues:
LDKpls1 <- plsr(as.matrix(logtsDK[,2]) ~as.matrix(LDK), ncomp=1)
cor(logtsDK[,2],LDKpls1$fitted.values[,1,1])
## [1] 0.803004
cor(logtsDK[,2],LDKpls1$fitted.values[,1,1])^2
## [1] 0.6448155
plot(LDKpls1)

##On passe au log de la variable y à prédire (ici c'est le loyer):
LDKLogYpls <- plsr(as.matrix(log(logtsDK[,2]))~ as.matrix(LDK),validation = "LOO")
plot(LDKLogYpls)

LDKLogYpls1 <- plsr(as.matrix(log(logtsDK[,2])) ~as.matrix(LDK),ncomp=1)
barplot(LDKLogYpls$validation$PRESS)

plot(RMSEP(LDKLogYpls), legendpos ="topright")

summary(LDKLogYpls)
## Data: X dimension: 41 82
## Y dimension: 41 1
## Fit method: kernelpls
## Number of components considered: 39
##
## VALIDATION: RMSEP
## Cross-validated using 41 leave-one-out segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 1.208 0.7991 0.7299 0.6763 0.3805 0.3095 0.3121
## adjCV 1.208 0.7985 0.7294 0.6696 0.3786 0.3074 0.3107
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
## CV 0.3284 0.3356 0.3337 0.3352 0.3376 0.3433 0.3516
## adjCV 0.3263 0.3323 0.3303 0.3319 0.3340 0.3392 0.3474
## 14 comps 15 comps 16 comps 17 comps 18 comps 19 comps 20 comps
## CV 0.3541 0.3489 0.3428 0.3362 0.3343 0.3339 0.3340
## adjCV 0.3498 0.3447 0.3386 0.3321 0.3302 0.3298 0.3299
## 21 comps 22 comps 23 comps 24 comps 25 comps 26 comps 27 comps
## CV 0.3350 0.3367 0.3371 0.3371 0.3372 0.3373 0.3374
## adjCV 0.3309 0.3326 0.3330 0.3330 0.3331 0.3331 0.3333
## 28 comps 29 comps 30 comps 31 comps 32 comps 33 comps 34 comps
## CV 0.3375 0.3375 0.3375 0.3375 0.3375 0.3375 0.3375
## adjCV 0.3333 0.3333 0.3333 0.3333 0.3333 0.3333 0.3333
## 35 comps 36 comps 37 comps 38 comps 39 comps
## CV 0.3375 0.3375 0.3375 0.3375 0.3375
## adjCV 0.3333 0.3333 0.3333 0.3333 0.3333
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps
## X 94.21 99.95 99.96 99.99 99.99
## as.matrix(log(logtsDK[, 2])) 59.22 65.68 85.60 94.54 97.49
## 6 comps 7 comps 8 comps 9 comps 10 comps
## X 99.99 99.99 99.99 99.99 99.99
## as.matrix(log(logtsDK[, 2])) 98.14 98.72 99.39 99.57 99.68
## 11 comps 12 comps 13 comps 14 comps 15 comps
## X 99.99 99.99 100.00 100.00 100.00
## as.matrix(log(logtsDK[, 2])) 99.82 99.91 99.95 99.97 99.98
## 16 comps 17 comps 18 comps 19 comps 20 comps
## X 100.00 100.00 100.00 100.00 100
## as.matrix(log(logtsDK[, 2])) 99.98 99.99 99.99 99.99 100
## 21 comps 22 comps 23 comps 24 comps 25 comps
## X 100 100 100 100 100
## as.matrix(log(logtsDK[, 2])) 100 100 100 100 100
## 26 comps 27 comps 28 comps 29 comps 30 comps
## X 100 100 100 100 100
## as.matrix(log(logtsDK[, 2])) 100 100 100 100 100
## 31 comps 32 comps 33 comps 34 comps 35 comps
## X 100 100 100 100 100
## as.matrix(log(logtsDK[, 2])) 100 100 100 100 100
## 36 comps 37 comps 38 comps 39 comps
## X 100 100 100 100
## as.matrix(log(logtsDK[, 2])) 100 100 100 100
##On fait la prédiction avec 5 composantes:
LDKLogYpls5 = plsr(as.matrix(log(logtsDK[,2])) ~as.matrix(LDK),ncomp=5)
plot(LDKLogYpls5)

cor(log(logtsDK[,2]),LDKLogYpls5$fitted.values[,1,5])
## [1] 0.9873553
cor(log(logtsDK[,2]),LDKLogYpls5$fitted.values[,1,5])^2
## [1] 0.9748705
##On trouve R^2=0.975; on régle sur le nombre des composantes.
plot(LDKLogYpls, ncomp = 5, line = TRUE)

cor(x=LDK,y=LDKLogYpls5$scores)
## Comp 1 Comp 2 Comp 3 Comp 4
## SurfTerrain 0.990385171 -0.138337005 0.0002398295 -1.663050e-04
## SurfHabitable 0.899451517 0.437019258 -0.0002748108 -7.809467e-04
## SurfPiecResid 0.781935836 0.245471091 -0.3225266785 4.706671e-01
## NbPieces 0.824551638 0.479147822 0.2209499379 -1.310730e-01
## NbPiecesResid 0.774878198 0.531747845 0.1924836334 -2.282014e-01
## NbSDB 0.819733245 0.296924813 0.2283418196 1.282359e-02
## NbChamBur 0.747040090 0.518401539 0.1499996248 -2.888039e-01
## NbSalonsSAM 0.704514616 0.469560994 0.2608879852 -4.135583e-02
## NbWC 0.778695118 0.402030140 0.2117711081 9.608673e-02
## NbCuis 0.330904755 -0.010502152 0.2421217408 -5.424413e-02
## Type.Appart -0.718821245 -0.413137418 0.1973739471 -6.268795e-02
## Type.Villa 0.718821245 0.413137418 -0.1973739471 6.268795e-02
## Standing.Non -0.500981659 -0.109997886 -0.4229686883 -3.248913e-01
## Standing.Oui 0.500981659 0.109997886 0.4229686883 3.248913e-01
## Etat.Bon -0.007531767 0.367046574 0.1712680760 1.549622e-01
## Etat.Mediocre -0.276836129 -0.259233126 -0.3557440952 -1.038299e-01
## Etat.Neuf 0.438593150 -0.142440935 0.1668706209 -8.364715e-02
## Etat.Vetuste -0.148257063 -0.057650699 0.0354129438 7.773829e-05
## Jardin.Non -0.565733717 0.172829170 -0.1195449375 -2.118147e-01
## Jardin.Oui 0.565733717 -0.172829170 0.1195449375 2.118147e-01
## Cour.Non -0.274585332 -0.183944205 -0.2041000168 1.007354e-01
## Cour.Oui 0.274585332 0.183944205 0.2041000168 -1.007354e-01
## Piscine.Non -0.464580446 -0.243198215 0.0694250700 -1.086425e-01
## Piscine.Oui 0.464580446 0.243198215 -0.0694250700 1.086425e-01
## Garage.Non -0.613058918 -0.337048835 -0.1476295320 -2.527746e-01
## Garage.Park -0.151462408 -0.167604062 0.2792691515 1.765326e-01
## Garage.Priv1v 0.349468962 0.256302756 -0.1524947227 2.287816e-01
## Garage.Priv2v 0.589624226 0.324000393 0.1374726965 -1.354518e-01
## Egout.Non 0.475953892 0.297408723 0.0206252914 -3.502615e-02
## Egout.Oui -0.475953892 -0.297408723 -0.0206252914 3.502615e-02
## HiTech.1inst 0.291059952 0.323368977 -0.1178810275 1.222242e-01
## HiTech.2inst 0.617303193 -0.202415682 0.1505408677 -2.536878e-01
## HiTech.Non -0.595130964 -0.178382636 0.0233084470 2.888423e-02
## DistCtrVille.0 -0.140677622 -0.027225363 0.5369006564 2.868468e-01
## DistCtrVille.1a5km 0.033989509 -0.100644522 -0.0246627000 7.288228e-02
## DistCtrVille.inf1km -0.216815170 -0.258112330 -0.0573307347 -1.218231e-01
## DistCtrVille.sup5km 0.211602778 0.326656033 -0.2356661006 -1.496103e-01
## Commerc.inf2km -0.351804766 -0.188096974 0.2426887341 -9.768146e-02
## Commerc.sup2km 0.351804766 0.188096974 -0.2426887341 9.768146e-02
## BordMer.inf2km 0.387925350 0.224403526 0.1553957317 5.972429e-02
## BordMer.sup2km -0.387925350 -0.224403526 -0.1553957317 -5.972429e-02
## Distractions.inf2km 0.284134864 -0.073016655 0.2975377804 4.960380e-01
## Distractions.sup2km -0.284134864 0.073016655 -0.2975377804 -4.960380e-01
## AxeRoutier.inf1km 0.290641094 0.061459852 0.4691600173 3.895813e-01
## AxeRoutier.sup1km -0.290641094 -0.061459852 -0.4691600173 -3.895813e-01
## StandingQuartier.bourge 0.534371455 0.146267931 0.5427083165 2.828916e-01
## StandingQuartier.moy -0.081441591 0.029126777 -0.2250878173 5.573684e-02
## StandingQuartier.popu -0.445495427 -0.168644389 -0.3231266182 -3.256276e-01
## QuartierAffaires.Non 0.192673487 0.070652789 -0.5061387469 -2.535940e-01
## QuartierAffaires.Oui -0.192673487 -0.070652789 0.5061387469 2.535940e-01
## Quartier.Almadies 0.267527027 0.118487272 0.1507590673 -1.561407e-01
## Quartier.BelAir 0.087949304 -0.037963809 -0.0976302333 2.217052e-01
## Quartier.Bopp -0.107717352 -0.054978010 0.0371779190 -2.027142e-01
## Quartier.Castors -0.033012113 0.014718483 -0.0946641387 -2.295350e-01
## Quartier.Colobane -0.124947223 -0.078655490 -0.0427452037 -2.103595e-02
## Quartier.Derkle -0.143361780 -0.104341844 -0.0836473921 -2.669521e-01
## Quartier.Fann -0.081191517 -0.018142966 0.2728999137 1.369320e-01
## Quartier.FannHock 0.079167161 0.241426741 0.2460214936 -5.546517e-02
## Quartier.FannResidence 0.594489969 -0.401145388 0.0594598140 -1.981150e-01
## Quartier.Fass -0.096783516 -0.244324838 -0.3629096549 1.491921e-01
## Quartier.FenetreMermoz -0.086290716 -0.025163659 0.2460369783 8.282680e-02
## Quartier.Foire 0.141188087 0.113702053 -0.1466182475 8.111606e-02
## Quartier.GrandYoff -0.102546036 -0.047712169 -0.0340970914 -4.134315e-02
## Quartier.GueuleTapee -0.091391745 -0.032350741 0.0878362064 -7.545233e-02
## Quartier.Hann 0.086501811 0.051897813 -0.0470829530 1.294282e-01
## Quartier.HLM -0.032403718 0.048477765 -0.0684027811 -1.815278e-01
## Quartier.JetdEau -0.113785193 -0.063247306 0.0643748502 -6.647781e-02
## Quartier.LiberteI -0.147344456 -0.109599829 -0.0738864083 -1.625044e-02
## Quartier.LiberteVI -0.023441992 0.124521350 0.0215211249 -1.342462e-01
## Quartier.Malika -0.071834863 0.059897425 -0.2699835390 -1.199018e-02
## Quartier.Mamelles 0.378104386 0.174901410 -0.0714398841 -2.946188e-03
## Quartier.Medina -0.176064461 -0.108547904 0.1126701146 -1.180343e-02
## Quartier.Mermoz -0.051261025 0.055229138 0.0415570359 3.584298e-01
## Quartier.Ngor 0.119994523 0.209353302 -0.0113251581 9.288067e-02
## Quartier.NiayeCoker -0.124003215 -0.077567116 -0.0392679972 -1.999202e-01
## Quartier.Parcelles 0.021041596 0.437417068 0.0451491306 -3.616002e-01
## Quartier.Pikine -0.124003232 -0.077570455 -0.0756415702 -2.289628e-01
## Quartier.Plateau -0.140677622 -0.027225363 0.5369006564 2.868468e-01
## Quartier.PointE 0.270719303 0.059349496 -0.0044585953 1.355403e-01
## Quartier.SacreCoeur 0.056218556 0.058350958 -0.2360511951 1.946422e-01
## Quartier.SacreCoeurIII 0.007211875 0.002172032 -0.1067888422 4.344549e-02
## Quartier.Yoff -0.173102458 -0.104704562 -0.1478880341 4.256535e-02
## Comp 5
## SurfTerrain 1.777622e-06
## SurfHabitable 3.552860e-04
## SurfPiecResid -3.051740e-02
## NbPieces -9.701728e-02
## NbPiecesResid -4.451819e-02
## NbSDB -2.046653e-01
## NbChamBur -3.740840e-02
## NbSalonsSAM -5.378093e-02
## NbWC -1.391414e-01
## NbCuis -6.374613e-02
## Type.Appart -5.587752e-02
## Type.Villa 5.587752e-02
## Standing.Non 1.147788e-01
## Standing.Oui -1.147788e-01
## Etat.Bon 3.581194e-01
## Etat.Mediocre -2.343795e-01
## Etat.Neuf -4.983829e-02
## Etat.Vetuste -2.166024e-01
## Jardin.Non 2.394500e-01
## Jardin.Oui -2.394500e-01
## Cour.Non -1.761303e-01
## Cour.Oui 1.761303e-01
## Piscine.Non -5.359399e-02
## Piscine.Oui 5.359399e-02
## Garage.Non 3.089216e-01
## Garage.Park -4.778903e-01
## Garage.Priv1v -8.774653e-03
## Garage.Priv2v 6.356304e-02
## Egout.Non 9.494935e-02
## Egout.Oui -9.494935e-02
## HiTech.1inst 5.330116e-02
## HiTech.2inst -4.708349e-02
## HiTech.Non -2.194580e-02
## DistCtrVille.0 4.720810e-01
## DistCtrVille.1a5km -2.346497e-01
## DistCtrVille.inf1km 1.697618e-01
## DistCtrVille.sup5km -1.442871e-01
## Commerc.inf2km 3.365436e-01
## Commerc.sup2km -3.365436e-01
## BordMer.inf2km -2.907157e-01
## BordMer.sup2km 2.907157e-01
## Distractions.inf2km 2.560451e-01
## Distractions.sup2km -2.560451e-01
## AxeRoutier.inf1km -8.672357e-02
## AxeRoutier.sup1km 8.672357e-02
## StandingQuartier.bourge 1.106404e-01
## StandingQuartier.moy -4.002392e-02
## StandingQuartier.popu -7.120123e-02
## QuartierAffaires.Non -5.729654e-01
## QuartierAffaires.Oui 5.729654e-01
## Quartier.Almadies -1.157616e-01
## Quartier.BelAir -1.133132e-01
## Quartier.Bopp 1.296944e-03
## Quartier.Castors 8.434794e-02
## Quartier.Colobane -9.660266e-02
## Quartier.Derkle 4.560586e-02
## Quartier.Fann -1.996885e-01
## Quartier.FannHock -6.093475e-02
## Quartier.FannResidence 5.001309e-02
## Quartier.Fass 3.559837e-02
## Quartier.FenetreMermoz -1.600580e-01
## Quartier.Foire -1.485386e-01
## Quartier.GrandYoff -3.642555e-01
## Quartier.GueuleTapee -1.208158e-01
## Quartier.Hann -1.160514e-01
## Quartier.HLM -5.239876e-02
## Quartier.JetdEau -1.086539e-01
## Quartier.LiberteI 7.222482e-02
## Quartier.LiberteVI -5.278951e-02
## Quartier.Malika -1.027358e-01
## Quartier.Mamelles 1.020130e-01
## Quartier.Medina 1.205860e-01
## Quartier.Mermoz -1.691084e-01
## Quartier.Ngor 1.559469e-01
## Quartier.NiayeCoker 1.585103e-01
## Quartier.Parcelles -1.093101e-01
## Quartier.Pikine 1.575745e-01
## Quartier.Plateau 4.720810e-01
## Quartier.PointE 2.906611e-02
## Quartier.SacreCoeur 2.337879e-01
## Quartier.SacreCoeurIII -8.483835e-02
## Quartier.Yoff 5.377489e-02
##Interprétation du modèle prédictif fondé sur la première composante :
regLDKc1 = lm(log(logtsDK[,2]) ~ LDKLogYpls1$scores)
LogLoyerModelPLS1 = as.matrix(LDKLogYpls1$coefficients[,1,])%*%as.matrix(regLDKc1$coefficients[2])
##Interprétation du modèle prédictif fondé sur les 5 premières composantes :
regLDKc5 <- lm(log(logtsDK[,2]) ~ LDKLogYpls5$scores)
LogLoyerModelPLS5 <- as.matrix(LDKLogYpls5$coefficients[,1,])%*%as.matrix(regLDKc5$coefficients[2:6])
regLDKc5$coefficients[2:6]
## LDKLogYpls5$scoresComp 1 LDKLogYpls5$scoresComp 2 LDKLogYpls5$scoresComp 3
## 0.004253071 0.005680053 0.408712086
## LDKLogYpls5$scoresComp 4 LDKLogYpls5$scoresComp 5
## 0.100948886 0.277579705
var(LDKLogYpls5$scores)
## Comp 1 Comp 2 Comp 3 Comp 4 Comp 5
## Comp 1 4.657292e+04 3.913470e-12 1.358349e-13 4.725423e-13 -2.217983e-14
## Comp 2 3.913470e-12 2.850608e+03 -1.290101e-14 4.461717e-14 -2.943782e-14
## Comp 3 1.358349e-13 -1.290101e-14 1.696126e+00 1.838755e-15 1.048034e-16
## Comp 4 4.725423e-13 4.461717e-14 1.838755e-15 1.248503e+01 1.778051e-16
## Comp 5 -2.217983e-14 -2.943782e-14 1.048034e-16 1.778051e-16 5.443803e-01
##Régression Ridge:
#La fonction qui effectue la régession ridge nous permet de faire des régresions pénalisées, et nous permet de
#contôler le coefficient de pénaalité:
logLoy <- log(logtsDK[,2])
fit1 <- glmnet(x=as.matrix(LDK) , y=logLoy, family="gaussian",alpha=0)
fit1
##
## Call: glmnet(x = as.matrix(LDK), y = logLoy, family = "gaussian", alpha = 0)
##
## Df %Dev Lambda
## 1 82 0.00 946.70
## 2 82 3.53 903.70
## 3 82 3.69 862.60
## 4 82 3.86 823.40
## 5 82 4.04 786.00
## 6 82 4.23 750.20
## 7 82 4.42 716.10
## 8 82 4.62 683.60
## 9 82 4.83 652.50
## 10 82 5.05 622.90
## 11 82 5.28 594.60
## 12 82 5.52 567.50
## 13 82 5.77 541.70
## 14 82 6.03 517.10
## 15 82 6.30 493.60
## 16 82 6.58 471.20
## 17 82 6.88 449.80
## 18 82 7.18 429.30
## 19 82 7.50 409.80
## 20 82 7.84 391.20
## 21 82 8.18 373.40
## 22 82 8.54 356.40
## 23 82 8.92 340.20
## 24 82 9.31 324.80
## 25 82 9.71 310.00
## 26 82 10.13 295.90
## 27 82 10.57 282.50
## 28 82 11.03 269.60
## 29 82 11.50 257.40
## 30 82 11.99 245.70
## 31 82 12.50 234.50
## 32 82 13.02 223.80
## 33 82 13.57 213.70
## 34 82 14.13 204.00
## 35 82 14.72 194.70
## 36 82 15.32 185.80
## 37 82 15.95 177.40
## 38 82 16.60 169.30
## 39 82 17.27 161.60
## 40 82 17.96 154.30
## 41 82 18.67 147.30
## 42 82 19.40 140.60
## 43 82 20.16 134.20
## 44 82 20.94 128.10
## 45 82 21.74 122.30
## 46 82 22.57 116.70
## 47 82 23.42 111.40
## 48 82 24.29 106.30
## 49 82 25.19 101.50
## 50 82 26.11 96.90
## 51 82 27.05 92.49
## 52 82 28.01 88.29
## 53 82 28.99 84.28
## 54 82 30.00 80.45
## 55 82 31.03 76.79
## 56 82 32.08 73.30
## 57 82 33.14 69.97
## 58 82 34.23 66.79
## 59 82 35.34 63.75
## 60 82 36.46 60.85
## 61 82 37.60 58.09
## 62 82 38.75 55.45
## 63 82 39.92 52.93
## 64 82 41.11 50.52
## 65 82 42.31 48.23
## 66 82 43.51 46.03
## 67 82 44.73 43.94
## 68 82 45.96 41.94
## 69 82 47.19 40.04
## 70 82 48.43 38.22
## 71 82 49.67 36.48
## 72 82 50.92 34.82
## 73 82 52.16 33.24
## 74 82 53.41 31.73
## 75 82 54.66 30.29
## 76 82 55.90 28.91
## 77 82 57.13 27.60
## 78 82 58.36 26.34
## 79 82 59.58 25.15
## 80 82 60.80 24.00
## 81 82 62.00 22.91
## 82 82 63.19 21.87
## 83 82 64.36 20.88
## 84 82 65.52 19.93
## 85 82 66.67 19.02
## 86 82 67.80 18.16
## 87 82 68.91 17.33
## 88 82 70.00 16.54
## 89 82 71.07 15.79
## 90 82 72.12 15.07
## 91 82 73.14 14.39
## 92 82 74.15 13.73
## 93 82 75.13 13.11
## 94 82 76.09 12.51
## 95 82 77.03 11.95
## 96 82 77.94 11.40
## 97 82 78.82 10.88
## 98 82 79.69 10.39
## 99 82 80.52 9.92
## 100 82 81.34 9.47
##Evolution des coeff quand lambda augmente:
plot(fit1, xvar='lambda')

cvfit1<- cv.glmnet(x=as.matrix(LDK), y=logLoy,family="gaussian",alpha=0)##Choix de lambda
plot(cvfit1) ##Courbe log(lambda) vs MSE

#valeur min de MSE (en validation croisée)
print(min(cvfit1$cvm))
## [1] 0.3579884
##lambda corresp.
print(cvfit1$lambda.min)
## [1] 9.466981
##On relance la regression avec le meilleur lambda:
fit <- glmnet(x=as.matrix(LDK) , y=logLoy, family="gaussian",alpha=0, lambda =9.466981)
#Coefficients du modèle obtenu:
coef(fit)
## 83 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 4.5504571462
## SurfTerrain 0.0001847839
## SurfHabitable 0.0003612079
## SurfPiecResid 0.0045285472
## NbPieces 0.0087914863
## NbPiecesResid 0.0122634696
## NbSDB 0.0477620380
## NbChamBur 0.0148572778
## NbSalonsSAM 0.0419043836
## NbWC 0.0461536698
## NbCuis 0.0837538080
## Type.Appart -0.0628372650
## Type.Villa 0.0628458655
## Standing.Non -0.0651306722
## Standing.Oui 0.0651397914
## Etat.Bon 0.0377067427
## Etat.Mediocre -0.0643994251
## Etat.Neuf 0.0383080260
## Etat.Vetuste -0.0324734934
## Jardin.Non -0.0494702152
## Jardin.Oui 0.0494770517
## Cour.Non -0.0806814458
## Cour.Oui 0.0806841932
## Piscine.Non -0.0506623728
## Piscine.Oui 0.0506725636
## Garage.Non -0.0693591791
## Garage.Park 0.0028997551
## Garage.Priv1v 0.0388881931
## Garage.Priv2v 0.0751981783
## Egout.Non 0.0485229866
## Egout.Oui -0.0485280212
## HiTech.1inst 0.0221007368
## HiTech.2inst 0.0773392132
## HiTech.Non -0.0404310853
## DistCtrVille.0 0.1134978890
## DistCtrVille.1a5km -0.0020987183
## DistCtrVille.inf1km -0.0334663693
## DistCtrVille.sup5km -0.0144515885
## Commerc.inf2km 0.0046179802
## Commerc.sup2km -0.0046187218
## BordMer.inf2km 0.0337037251
## BordMer.sup2km -0.0337032019
## Distractions.inf2km 0.0732295375
## Distractions.sup2km -0.0732299978
## AxeRoutier.inf1km 0.0629503755
## AxeRoutier.sup1km -0.0629492205
## StandingQuartier.bourge 0.0957790192
## StandingQuartier.moy -0.0183825207
## StandingQuartier.popu -0.0753417853
## QuartierAffaires.Non -0.0780218240
## QuartierAffaires.Oui 0.0780225067
## Quartier.Almadies 0.0305840265
## Quartier.BelAir 0.0186856223
## Quartier.Bopp -0.0736677972
## Quartier.Castors -0.0256732819
## Quartier.Colobane -0.0744034727
## Quartier.Derkle -0.0700682076
## Quartier.Fann 0.0698208589
## Quartier.FannHock 0.0791635446
## Quartier.FannResidence 0.1202105620
## Quartier.Fass -0.0573161363
## Quartier.FenetreMermoz 0.0262481935
## Quartier.Foire -0.0111182717
## Quartier.GrandYoff -0.0842374423
## Quartier.GueuleTapee -0.0226322829
## Quartier.Hann 0.0158698076
## Quartier.HLM -0.0117097743
## Quartier.JetdEau -0.0767583908
## Quartier.LiberteI -0.0725814026
## Quartier.LiberteVI -0.0203300633
## Quartier.Malika -0.0790798776
## Quartier.Mamelles 0.0483702326
## Quartier.Medina -0.0294313553
## Quartier.Mermoz 0.0303892493
## Quartier.Ngor 0.0546192055
## Quartier.NiayeCoker -0.0401851870
## Quartier.Parcelles -0.0238868286
## Quartier.Pikine -0.0373187607
## Quartier.Plateau 0.1134952551
## Quartier.PointE 0.0561492437
## Quartier.SacreCoeur 0.0464885059
## Quartier.SacreCoeurIII -0.0183166801
## Quartier.Yoff -0.0438516877
#lambda le plus élevé dont le MSE est inf.
#à la borne haute de l’intervalle de min(MSE)
cvfit1$lambda.1se
## [1] 12.51479
##ici R^2= 0.81 vs R^2= 0.97 pour PLS sur 5composantes
fit1$beta[,100]
## SurfTerrain SurfHabitable SurfPiecResid
## 0.0001849058 0.0003614481 0.0045305744
## NbPieces NbPiecesResid NbSDB
## 0.0087954064 0.0122674857 0.0477684989
## NbChamBur NbSalonsSAM NbWC
## 0.0148582520 0.0419034645 0.0461485294
## NbCuis Type.Appart Type.Villa
## 0.0837439606 -0.0628236394 0.0628294111
## Standing.Non Standing.Oui Etat.Bon
## -0.0651229391 0.0651265687 0.0377033877
## Etat.Mediocre Etat.Neuf Etat.Vetuste
## -0.0643893009 0.0382952106 -0.0324671938
## Jardin.Non Jardin.Oui Cour.Non
## -0.0494561339 0.0494613569 -0.0806689700
## Cour.Oui Piscine.Non Piscine.Oui
## 0.0806730621 -0.0506404096 0.0506487428
## Garage.Non Garage.Park Garage.Priv1v
## -0.0693381409 0.0029068701 0.0388790812
## Garage.Priv2v Egout.Non Egout.Oui
## 0.0751685591 0.0484981155 -0.0485062883
## HiTech.1inst HiTech.2inst HiTech.Non
## 0.0220888837 0.0773116813 -0.0404177032
## DistCtrVille.0 DistCtrVille.1a5km DistCtrVille.inf1km
## 0.1135000405 -0.0020991186 -0.0334554875
## DistCtrVille.sup5km Commerc.inf2km Commerc.sup2km
## -0.0144577215 0.0046261711 -0.0046236760
## BordMer.inf2km BordMer.sup2km Distractions.inf2km
## 0.0336949479 -0.0336974142 0.0732230848
## Distractions.sup2km AxeRoutier.inf1km AxeRoutier.sup1km
## -0.0732246821 0.0629453272 -0.0629462704
## StandingQuartier.bourge StandingQuartier.moy StandingQuartier.popu
## 0.0957745461 -0.0183845305 -0.0753407843
## QuartierAffaires.Non QuartierAffaires.Oui Quartier.Almadies
## -0.0780265142 0.0780252802 0.0305948912
## Quartier.BelAir Quartier.Bopp Quartier.Castors
## 0.0186852685 -0.0736694215 -0.0256784523
## Quartier.Colobane Quartier.Derkle Quartier.Fann
## -0.0744036064 -0.0700699173 0.0698289656
## Quartier.FannHock Quartier.FannResidence Quartier.Fass
## 0.0791599623 0.1201991115 -0.0573199056
## Quartier.FenetreMermoz Quartier.Foire Quartier.GrandYoff
## 0.0262519457 -0.0111197092 -0.0842375595
## Quartier.GueuleTapee Quartier.Hann Quartier.HLM
## -0.0226370166 0.0158725258 -0.0117135723
## Quartier.JetdEau Quartier.LiberteI Quartier.LiberteVI
## -0.0767487326 -0.0725753958 -0.0203350243
## Quartier.Malika Quartier.Mamelles Quartier.Medina
## -0.0790798950 0.0483744096 -0.0294323851
## Quartier.Mermoz Quartier.Ngor Quartier.NiayeCoker
## 0.0303927141 0.0546292303 -0.0401868043
## Quartier.Parcelles Quartier.Pikine Quartier.Plateau
## -0.0238978848 -0.0373183092 0.1134963241
## Quartier.PointE Quartier.SacreCoeur Quartier.SacreCoeurIII
## 0.0561455048 0.0464827684 -0.0183167663
## Quartier.Yoff
## -0.0438491545
##Comparaison des coefficients de PLS et Ridge:
fit1$beta[,100]
## SurfTerrain SurfHabitable SurfPiecResid
## 0.0001849058 0.0003614481 0.0045305744
## NbPieces NbPiecesResid NbSDB
## 0.0087954064 0.0122674857 0.0477684989
## NbChamBur NbSalonsSAM NbWC
## 0.0148582520 0.0419034645 0.0461485294
## NbCuis Type.Appart Type.Villa
## 0.0837439606 -0.0628236394 0.0628294111
## Standing.Non Standing.Oui Etat.Bon
## -0.0651229391 0.0651265687 0.0377033877
## Etat.Mediocre Etat.Neuf Etat.Vetuste
## -0.0643893009 0.0382952106 -0.0324671938
## Jardin.Non Jardin.Oui Cour.Non
## -0.0494561339 0.0494613569 -0.0806689700
## Cour.Oui Piscine.Non Piscine.Oui
## 0.0806730621 -0.0506404096 0.0506487428
## Garage.Non Garage.Park Garage.Priv1v
## -0.0693381409 0.0029068701 0.0388790812
## Garage.Priv2v Egout.Non Egout.Oui
## 0.0751685591 0.0484981155 -0.0485062883
## HiTech.1inst HiTech.2inst HiTech.Non
## 0.0220888837 0.0773116813 -0.0404177032
## DistCtrVille.0 DistCtrVille.1a5km DistCtrVille.inf1km
## 0.1135000405 -0.0020991186 -0.0334554875
## DistCtrVille.sup5km Commerc.inf2km Commerc.sup2km
## -0.0144577215 0.0046261711 -0.0046236760
## BordMer.inf2km BordMer.sup2km Distractions.inf2km
## 0.0336949479 -0.0336974142 0.0732230848
## Distractions.sup2km AxeRoutier.inf1km AxeRoutier.sup1km
## -0.0732246821 0.0629453272 -0.0629462704
## StandingQuartier.bourge StandingQuartier.moy StandingQuartier.popu
## 0.0957745461 -0.0183845305 -0.0753407843
## QuartierAffaires.Non QuartierAffaires.Oui Quartier.Almadies
## -0.0780265142 0.0780252802 0.0305948912
## Quartier.BelAir Quartier.Bopp Quartier.Castors
## 0.0186852685 -0.0736694215 -0.0256784523
## Quartier.Colobane Quartier.Derkle Quartier.Fann
## -0.0744036064 -0.0700699173 0.0698289656
## Quartier.FannHock Quartier.FannResidence Quartier.Fass
## 0.0791599623 0.1201991115 -0.0573199056
## Quartier.FenetreMermoz Quartier.Foire Quartier.GrandYoff
## 0.0262519457 -0.0111197092 -0.0842375595
## Quartier.GueuleTapee Quartier.Hann Quartier.HLM
## -0.0226370166 0.0158725258 -0.0117135723
## Quartier.JetdEau Quartier.LiberteI Quartier.LiberteVI
## -0.0767487326 -0.0725753958 -0.0203350243
## Quartier.Malika Quartier.Mamelles Quartier.Medina
## -0.0790798950 0.0483744096 -0.0294323851
## Quartier.Mermoz Quartier.Ngor Quartier.NiayeCoker
## 0.0303927141 0.0546292303 -0.0401868043
## Quartier.Parcelles Quartier.Pikine Quartier.Plateau
## -0.0238978848 -0.0373183092 0.1134963241
## Quartier.PointE Quartier.SacreCoeur Quartier.SacreCoeurIII
## 0.0561455048 0.0464827684 -0.0183167663
## Quartier.Yoff
## -0.0438491545
##LASSO:
fit2 <- glmnet(x=as.matrix(LDK) , y=logLoy, family="gaussian",alpha=1 )
fit2
##
## Call: glmnet(x = as.matrix(LDK), y = logLoy, family = "gaussian", alpha = 1)
##
## Df %Dev Lambda
## 1 0 0.00 0.94670
## 2 3 6.34 0.90370
## 3 3 13.15 0.86260
## 4 3 19.36 0.82340
## 5 3 25.02 0.78600
## 6 4 30.18 0.75020
## 7 4 34.89 0.71610
## 8 4 39.17 0.68360
## 9 4 43.08 0.65250
## 10 4 46.65 0.62290
## 11 4 49.89 0.59460
## 12 4 52.85 0.56750
## 13 4 55.54 0.54170
## 14 4 58.00 0.51710
## 15 5 60.27 0.49360
## 16 5 62.37 0.47120
## 17 5 64.27 0.44980
## 18 5 66.01 0.42930
## 19 6 67.62 0.40980
## 20 7 69.25 0.39120
## 21 7 70.76 0.37340
## 22 7 72.13 0.35640
## 23 8 73.39 0.34020
## 24 8 74.88 0.32480
## 25 10 76.28 0.31000
## 26 10 77.59 0.29590
## 27 10 78.81 0.28250
## 28 12 80.22 0.26960
## 29 12 81.56 0.25740
## 30 13 82.81 0.24570
## 31 13 83.96 0.23450
## 32 13 85.01 0.22380
## 33 13 85.96 0.21370
## 34 15 86.85 0.20400
## 35 15 87.83 0.19470
## 36 16 88.70 0.18580
## 37 16 89.52 0.17740
## 38 17 90.26 0.16930
## 39 17 90.94 0.16160
## 40 17 91.56 0.15430
## 41 18 92.13 0.14730
## 42 17 92.67 0.14060
## 43 17 93.16 0.13420
## 44 17 93.60 0.12810
## 45 17 94.01 0.12230
## 46 18 94.37 0.11670
## 47 17 94.71 0.11140
## 48 17 95.02 0.10630
## 49 17 95.29 0.10150
## 50 18 95.55 0.09690
## 51 18 95.78 0.09249
## 52 17 95.99 0.08829
## 53 17 96.18 0.08428
## 54 17 96.36 0.08045
## 55 18 96.52 0.07679
## 56 17 96.66 0.07330
## 57 18 96.80 0.06997
## 58 19 96.92 0.06679
## 59 19 97.03 0.06375
## 60 18 97.13 0.06085
## 61 20 97.23 0.05809
## 62 21 97.33 0.05545
## 63 21 97.43 0.05293
## 64 21 97.52 0.05052
## 65 21 97.60 0.04823
## 66 21 97.67 0.04603
## 67 22 97.74 0.04394
## 68 23 97.82 0.04194
## 69 23 97.90 0.04004
## 70 23 97.97 0.03822
## 71 25 98.06 0.03648
## 72 25 98.16 0.03482
## 73 28 98.25 0.03324
## 74 29 98.34 0.03173
## 75 29 98.43 0.03029
## 76 29 98.51 0.02891
## 77 30 98.60 0.02760
## 78 30 98.67 0.02634
## 79 32 98.75 0.02515
## 80 32 98.83 0.02400
## 81 32 98.90 0.02291
## 82 33 98.96 0.02187
## 83 35 99.02 0.02088
## 84 35 99.09 0.01993
## 85 35 99.14 0.01902
## 86 36 99.20 0.01816
## 87 37 99.25 0.01733
## 88 37 99.31 0.01654
## 89 36 99.35 0.01579
## 90 37 99.39 0.01507
## 91 38 99.42 0.01439
## 92 38 99.46 0.01373
## 93 39 99.49 0.01311
## 94 40 99.52 0.01251
## 95 40 99.55 0.01195
## 96 41 99.59 0.01140
## 97 41 99.61 0.01088
## 98 41 99.64 0.01039
## 99 41 99.67 0.00992
## 100 41 99.69 0.00947
plot(fit2, xvar='lambda')

cvfit2 <- cv.glmnet(x=as.matrix(LDK), y=logLoy, family="gaussian", alpha=1)
plot(cvfit2) ##Courbe log(lambda) vs MSE

#valeur min de MSE (en validation croisée)
min(cvfit2$cvm)
## [1] 0.1120604
##lambda corresp.
min(cvfit2$lambda)
## [1] 0.009466981
##On relance la regression avec le meilleur lambda:
fit_new <- glmnet(x=as.matrix(LDK) , y=logLoy, family="gaussian",alpha=1, lambda =0.00947)
#Coefficients du modèle obtenu:
coef(fit_new)
## 83 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 3.817206e+00
## SurfTerrain 3.689737e-04
## SurfHabitable .
## SurfPiecResid 1.271086e-02
## NbPieces 2.444574e-02
## NbPiecesResid .
## NbSDB 9.525848e-02
## NbChamBur .
## NbSalonsSAM 9.199129e-02
## NbWC 4.842380e-02
## NbCuis 2.827871e-02
## Type.Appart -2.507841e-01
## Type.Villa .
## Standing.Non .
## Standing.Oui .
## Etat.Bon 1.140692e-03
## Etat.Mediocre -1.947687e-01
## Etat.Neuf .
## Etat.Vetuste .
## Jardin.Non -7.175928e-02
## Jardin.Oui 1.122933e-16
## Cour.Non -7.461867e-01
## Cour.Oui 7.690091e-14
## Piscine.Non .
## Piscine.Oui .
## Garage.Non -3.349738e-01
## Garage.Park 4.319967e-02
## Garage.Priv1v .
## Garage.Priv2v .
## Egout.Non .
## Egout.Oui .
## HiTech.1inst .
## HiTech.2inst .
## HiTech.Non .
## DistCtrVille.0 6.827965e-01
## DistCtrVille.1a5km .
## DistCtrVille.inf1km .
## DistCtrVille.sup5km .
## Commerc.inf2km 1.486377e-01
## Commerc.sup2km -1.086251e-16
## BordMer.inf2km 3.063656e-02
## BordMer.sup2km .
## Distractions.inf2km 5.182085e-01
## Distractions.sup2km .
## AxeRoutier.inf1km 1.940270e-01
## AxeRoutier.sup1km .
## StandingQuartier.bourge 4.561529e-01
## StandingQuartier.moy .
## StandingQuartier.popu -1.801859e-01
## QuartierAffaires.Non -3.708936e-03
## QuartierAffaires.Oui .
## Quartier.Almadies .
## Quartier.BelAir .
## Quartier.Bopp .
## Quartier.Castors 4.629276e-02
## Quartier.Colobane -1.337399e-01
## Quartier.Derkle 5.411992e-01
## Quartier.Fann 3.776828e-02
## Quartier.FannHock .
## Quartier.FannResidence 2.513412e-01
## Quartier.Fass -7.003918e-03
## Quartier.FenetreMermoz .
## Quartier.Foire .
## Quartier.GrandYoff -2.926742e-01
## Quartier.GueuleTapee .
## Quartier.Hann .
## Quartier.HLM 9.730112e-02
## Quartier.JetdEau -1.967823e-01
## Quartier.LiberteI .
## Quartier.LiberteVI -1.258936e-01
## Quartier.Malika .
## Quartier.Mamelles .
## Quartier.Medina .
## Quartier.Mermoz .
## Quartier.Ngor .
## Quartier.NiayeCoker .
## Quartier.Parcelles .
## Quartier.Pikine 2.613676e-01
## Quartier.Plateau 1.072528e-01
## Quartier.PointE -8.564901e-02
## Quartier.SacreCoeur .
## Quartier.SacreCoeurIII -5.259822e-02
## Quartier.Yoff .
##On trouve un R^2 de 0.99 ( vs R^2= 0.81 pour Ridge et vs R^2= 0.97 pour PLS sur 5composantes)
fit2$beta[,100]
## SurfTerrain SurfHabitable SurfPiecResid
## 6.004233e-04 0.000000e+00 1.127415e-02
## NbPieces NbPiecesResid NbSDB
## 2.253262e-02 0.000000e+00 5.689767e-02
## NbChamBur NbSalonsSAM NbWC
## 0.000000e+00 1.164135e-01 6.307075e-02
## NbCuis Type.Appart Type.Villa
## 1.731411e-02 -2.327086e-01 2.109452e-13
## Standing.Non Standing.Oui Etat.Bon
## 0.000000e+00 0.000000e+00 2.098806e-02
## Etat.Mediocre Etat.Neuf Etat.Vetuste
## -1.601989e-01 0.000000e+00 0.000000e+00
## Jardin.Non Jardin.Oui Cour.Non
## -4.312547e-02 1.531322e-13 -7.045781e-01
## Cour.Oui Piscine.Non Piscine.Oui
## 3.519603e-13 0.000000e+00 0.000000e+00
## Garage.Non Garage.Park Garage.Priv1v
## -3.619065e-01 4.366410e-02 0.000000e+00
## Garage.Priv2v Egout.Non Egout.Oui
## 0.000000e+00 0.000000e+00 0.000000e+00
## HiTech.1inst HiTech.2inst HiTech.Non
## 0.000000e+00 0.000000e+00 0.000000e+00
## DistCtrVille.0 DistCtrVille.1a5km DistCtrVille.inf1km
## 7.709611e-01 0.000000e+00 0.000000e+00
## DistCtrVille.sup5km Commerc.inf2km Commerc.sup2km
## 0.000000e+00 1.718972e-01 0.000000e+00
## BordMer.inf2km BordMer.sup2km Distractions.inf2km
## 6.971288e-03 0.000000e+00 4.863005e-01
## Distractions.sup2km AxeRoutier.inf1km AxeRoutier.sup1km
## -1.296599e-03 2.123460e-01 0.000000e+00
## StandingQuartier.bourge StandingQuartier.moy StandingQuartier.popu
## 4.484426e-01 0.000000e+00 -1.799222e-01
## QuartierAffaires.Non QuartierAffaires.Oui Quartier.Almadies
## 0.000000e+00 0.000000e+00 0.000000e+00
## Quartier.BelAir Quartier.Bopp Quartier.Castors
## 0.000000e+00 -5.675849e-02 4.125753e-02
## Quartier.Colobane Quartier.Derkle Quartier.Fann
## -1.623327e-01 4.633412e-01 8.387169e-02
## Quartier.FannHock Quartier.FannResidence Quartier.Fass
## 0.000000e+00 1.743841e-01 -3.304288e-02
## Quartier.FenetreMermoz Quartier.Foire Quartier.GrandYoff
## 0.000000e+00 0.000000e+00 -3.214242e-01
## Quartier.GueuleTapee Quartier.Hann Quartier.HLM
## 0.000000e+00 0.000000e+00 4.899427e-02
## Quartier.JetdEau Quartier.LiberteI Quartier.LiberteVI
## -2.288655e-01 0.000000e+00 -1.151605e-01
## Quartier.Malika Quartier.Mamelles Quartier.Medina
## 0.000000e+00 0.000000e+00 0.000000e+00
## Quartier.Mermoz Quartier.Ngor Quartier.NiayeCoker
## 4.865839e-03 0.000000e+00 0.000000e+00
## Quartier.Parcelles Quartier.Pikine Quartier.Plateau
## 0.000000e+00 2.399992e-01 1.122180e-02
## Quartier.PointE Quartier.SacreCoeur Quartier.SacreCoeurIII
## -1.069969e-01 0.000000e+00 -6.740337e-02
## Quartier.Yoff
## 0.000000e+00